/* ****************************************
	DESCRIPTION
This  do file creates all output for Paul Clist & Ying-yi Hong's  
Do International Students Learn Foreign Preferences? The Interplay of Language, Identity and Assimilation
Forthcoming at the Journal of Economic Psychology
Code Version 4, May 2023
Our Websites: https://paulclist.github.io/ and https://www.yingyihong.org/ 

			CONTENTS
* Prep: 	Load data, set some locals & the working directory
* Table  2: Summary Stats, Controls
* Table  3: Summary Stats, Outcomes, by group (with p value for test of differences)
* Figure 2: Four Effects by outcome question
* Figure 3: Controls
* Table  4: Four Effects, with MHT correction
* Figure 4: Three Correlations Between Effects 
* Figure 5: Robustness check: Time in the UK
* Table  5: Robustness check: Language Proficiency 
* Table  6: Predicting Revealed Preference with Stated Preference
 
	Data Notes: 
1. 	All outcome and control questions are available in raw form and centered (with suffix _cen) 
2. 	In prep set the number of bootstraps - the default will be quick but wrong, 
	delete one line to be correct but slow
3. 	In prep, set the working directory
4. 	In prep, install the required packages. 
*/ 

**************************************** Prep: Load data, set some locals & the working directory
clear 
version 16

* Set the working directory to wherever you've saved the data. 
cd "C:\Users\ugx10yuu\OneDrive - University of East Anglia\work\datasets\language asia\2023 replication website\"

* Load the data 
use "data_prepared.dta", clear

* The number of bootstraps - important! 
global Nbootstraps 10000 // this line sets the number of bootstraps to be used in all multiple hypothesis testing corrections.  
global Nbootstraps 1 // Delete/Comment this line if you're interested in the SEs from MHT, and do not mind the ~4 hour run time. 
 
* Set gloabls for dependent variables, controls etc. 
global DEPVARS_C 	q1_cen q13_cen- q31_cen
global DEPVARS 		q1 q13 q14 q15 q16 q24 q17 q18 q21 q19  q26 q20 q25 q22  q27 q28 q29 q30 q31
global controls 	female  urban age known religious ses math 
global controls_C 	female  urban age_cen known_cen religious_cen ses_cen math_cen 
global groups 		bornchina mandarin  beijing

* install packages (if not already installed)
* ssc install wyoung 		// MHT corrections
* ssc install esttab 		// easier grouping of results
* ssc install blindschemes 	// if you want Daniel Bischof's great scheme
* ssc install qqvalue 		// more MHT corrections (using only p values as input)

************************************ Table  2: Summary Stats, Controls
gen complete=1	// To measure missing questions, complete=0 means its missing
foreach var of varlist  $controls_C  {
replace complete=0 if `var'==.
}

** We'll do the MHT corrections first. 
qui wyoung $controls complete , cmd(regress OUTCOMEVAR beijing if lang==1 | lang==2, robust) familyp(beijing) bootstraps($Nbootstraps) seed(20)
matrix A=r(table) // Location
qui wyoung $controls complete , cmd(regress OUTCOMEVAR mandarin if lang==2 | lang==3, robust) familyp(mandarin) bootstraps($Nbootstraps) seed(20)
matrix B=r(table) // Language
qui wyoung $controls complete , cmd(regress OUTCOMEVAR bornchina if lang==3 | lang==4, robust) familyp(bornchina) bootstraps($Nbootstraps) seed(20)
matrix C=r(table) // Nationality
qui wyoung $controls complete , cmd(regress OUTCOMEVAR mandarin if lang==1 | lang==4, robust) familyp(mandarin) bootstraps($Nbootstraps) seed(20)
matrix D=r(table) // Country

mat F = J(14,8,.)		// To give the first means and SEs, in a simple matrix. 
qui forvalues i=1/4{
	sum female if lang==`i', d
	local temp1=100*r(mean)
	mat F[1,`i'] = round(`temp1',.1)

	sum urban if lang==`i', d
	local temp2=100*r(mean)
	mat F[2,`i'] = round(`temp2',.1)
	
	local ticker=3
	foreach var of varlist age known religious ses math {
		reg `var' ibn.lang , noconstant
		mat F[`ticker',`i'] =round(_b[`i'.lang],.01)
		local ticker=`ticker'+1
		mat F[`ticker',`i'] =round(_se[`i'.lang],.01)
		local ticker=`ticker'+1
		}
		
	count if complete==1 & lang==`i'
	mat F[13,`i'] =r(N)
	count if  lang==`i'
	mat F[14,`i'] =r(N)
	}

qui forvalues i=1/8 {
local j=`i'

if `i'==4 {
	local j=5 
	}
if `i'==5 {
	local j=7
	}
if `i'==6 {
	local j=9
	}
if `i'==7 {
	local j=11 
	}
if `i'==8 {
	local j=13 
	}
matrix F[`j',5]=A[`i', 4]
matrix F[`j',6]=B[`i', 4]
matrix F[`j',7]=C[`i', 4]
matrix F[`j',8]=D[`i', 4]
}

mat list F




************************************ Table  3: Summary Stats, Outcomes, by group 
eststo clear
qui foreach var in $DEPVARS {
eststo: reg `var' ibn.lang, noconstant  robust
test 1.lang=2.lang=3.lang=4.lang
estadd  scalar  pvalue = r(p)
}  	
* esttab doens't play nicely with transposing with the p value, so we transpose this outside of stata
esttab using temp.csv,  cells(b(fmt(a2)) se(par)) wide  stats(pvalue, fmt(a3)) nostar  replace

************************************ Figure 2: Four Effects by outcome question

set scheme plottig							// Commands for graph style  - can delete if not working for you. 
grstyle init
grstyle set symbolsize  medsmall  
grstyle set linewidth  medium   
grstyle set size 12pt: subheading axis_title
grstyle set size 10.5pt: tick_label  
grstyle color tick_label black
graph set window fontface "Times New Roman"
*graph set window fontface "Helvetica"
grstyle set margin 1mm: twoway
grstyle set margin 1mm: graph  

foreach var of varlist $DEPVARS_C  {					// Getting the marginal effects to plot - easiest using matrices
qui regress `var' ibn.lang $controls_C , noconstant robust 
qui contrast {lang 1 -1 0 0} {lang 0 1 -1 0} {lang 0 0 1 -1 } {lang 1 0 0 -1 }, pveffects
matrix A`var'= r(table)
matrix colnames A`var' = Beijing Mandarin Chinese China
} 

coefplot	(matrix(Aq1_cen[1,]), se(Aq1_cen[2,])) || ///
(matrix(Aq13_cen[1,]), se(Aq13_cen[2,])) || (matrix(Aq14_cen[1,]), se(Aq14_cen[2,])) || ///
(matrix(Aq15_cen[1,]), se(Aq15_cen[2,])) || ///
(matrix(Aq16_cen[1,]), se(Aq16_cen[2,])) || (matrix(Aq24_cen[1,]), se(Aq24_cen[2,])) || /// 
(matrix(Aq17_cen[1,]), se(Aq17_cen[2,])) || (matrix(Aq18_cen[1,]), se(Aq18_cen[2,])) || ///
(matrix(Aq21_cen[1,]), se(Aq21_cen[2,])) || ///
(matrix(Aq19_cen[1,]), se(Aq19_cen[2,])) || (matrix(Aq26_cen[1,]), se(Aq26_cen[2,])) || ///
(matrix(Aq20_cen[1,]), se(Aq20_cen[2,])) || (matrix(Aq25_cen[1,]), se(Aq25_cen[2,])) || ///
(matrix(Aq22_cen[1,]), se(Aq22_cen[2,])) || (matrix(Aq27_cen[1,]), se(Aq27_cen[2,])) || ///
(matrix(Aq28_cen[1,]), se(Aq28_cen[2,])) || (matrix(Aq29_cen[1,]), se(Aq29_cen[2,])) || ///
(matrix(Aq30_cen[1,]), se(Aq30_cen[2,])) || (matrix(Aq31_cen[1,]), se(Aq31_cen[2,]))  ///
,  bycoefs  keep(Beijing Mandarin Chinese China) xtitle("Coefficients and 95% Confidence Intervals, using Centred Data", size(small ))  ytitle("")   xline(0, lp(dash) lc(plr1) )  byopts(  col(7) legend(off) imargin(tiny)  )	///
xlabel(-1(1)1) xtick(-1(.5)1.5, grid)	 ///
coeflabels(Beijing = `" "Location" "' Mandarin = `""Language""' 		Chinese = `""Nationality""' China = `""Country""') ysize(4.5) ///
headings(1 = "Die Roll" 2 = "Cheating"  5 = "Patience" 7 = "Neg. Recip." 10 = "Altruism"  12 = "Pos. Recip"   14 = "Trusting"   16 = "Ethics" , ///
gap(.1)) pstyle(p2)  mcol(black) ciopts(recast(rcap)) ///
 bylabels(q1 q13 q14 q15 q16 q24 q17 q18 q21 q19  q26 q20 q25 q22  q27 q28 q29 q30 q31) 
 
 graph export "four_dummies_outcomes.pdf", as(pdf) replace

************************************ Figure 3: Controls

grstyle set symbolsize  small  
est clear
foreach var of varlist $DEPVARS_C  {
qui regress `var' ibn.lang  $controls_C , noconstant robust
eststo s`var'
}

coefplot (sq1_cen)  ||  (sq13_cen)  || (sq14_cen) || (sq15_cen) || (sq16_cen) || (sq24_cen) ///
		|| (sq17_cen) || (sq18_cen) || (sq21_cen) || (sq19_cen) || (sq26_cen) ///
		|| (sq20_cen) || (sq25_cen)	|| (sq22_cen) || (sq27_cen)	|| (sq28_cen) ///
		|| (sq29_cen) || (sq30_cen) || (sq31_cen) , bycoefs keep($controls_C) ///
		xline(0, lp(dash) lc(plr1)) byopts(col(7) legend(off)) xlabel(-.5(.5).5) xtick(-0.5(.25)0.5, grid) ///
		xtitle("Coeficients and 95% Confidence Intervals, using centred data", size(small )) ///
		ysize(4.5) pstyle(p2)  mcol(black) ciopts(recast(rcap)) ///
		headings(1 = "Die Roll" 2 = "Cheating"  5 = "Patience" 7 = "Neg. Recip." 10 = "Altruism"  12 = "Pos. Recip"   14 = "Trusting"   16 = "Ethics" ///
		, gap(.1)) bylabels(q1 q13 q14 q15 q16 q24 q17 q18 q21 q19  q26 q20 q25 q22  q27 q28 q29 q30 q31)

 graph export "four_dummies_controls.pdf", as(pdf) replace

 esttab s* using temp.csv, replace nostar  // to calculate average magnitudes

************************************ Table  4: Four Effects, with MHT correction

/* 
Note that the graphs above allow us to plot marginal effects in keeping with the relevant comparison from figure 1.  However, we wish to control the false discovery rate, using wyoung (Author: Julian Reif, University of Illinois)  
That doesn't allow margins as an input. So, we need to generate the specific comparison we're interested using regressions where we manually control the comparison. To do this we need to choose the relevant base category. E.g. the first regression excludes the 2nd group (Mandarin) and tests the effect of being in the first (Beijing). Throughout, we test the effect of being 'more Chinese'.
*/

tab lang, gen(langdummy) 

* to quickly work out N (seems odd in wyoung)
foreach OUTCOMEVAR of varlist $DEPVARS_C {
qui reg `OUTCOMEVAR' langdummy1 langdummy3 langdummy4 $controls_C, robust 
di "`OUTCOMEVAR' sample is " `e(N)'
}

* base category=2
qui wyoung $DEPVARS_C , cmd(regress OUTCOMEVAR langdummy1 langdummy3 langdummy4 $controls_C, robust ) familyp(langdummy1) ///
						bootstraps($Nbootstraps) seed(20)
matrix PYtemp=r(table)
matrix PYbeijing = PYtemp[1..19,1],PYtemp[1..19,4]

* base category=3
qui wyoung $DEPVARS_C , cmd(regress OUTCOMEVAR langdummy1 langdummy2 langdummy4 $controls_C, robust ) familyp(langdummy2) ///
						bootstraps($Nbootstraps) seed(20)
matrix PYtemp=r(table)
matrix PYMandarin = PYtemp[1..19,1],PYtemp[1..19,4]

* base category=4
qui wyoung $DEPVARS_C , cmd(regress OUTCOMEVAR langdummy1 langdummy2 langdummy3 $controls_C, robust ) familyp(langdummy3) ///
						bootstraps($Nbootstraps) seed(20)
matrix PYtemp=r(table)
matrix PYChinese = PYtemp[1..19,1],PYtemp[1..19,4]

* base category=4
qui wyoung $DEPVARS_C , cmd(regress OUTCOMEVAR langdummy1 langdummy2 langdummy3 $controls_C, robust ) familyp(langdummy1) ///
						bootstraps($Nbootstraps) seed(20)
matrix PYtemp=r(table)
matrix PYChina = PYtemp[1..19,1],PYtemp[1..19,4]

* data wrangling
matrix coljoinbyname PYoung_all =PYbeijing PYMandarin PYChinese PYChina
matrix colnames PYoung_all =Beijing_b Beijing_p  Mandarin_b  Mandarin_p Chinese_b Chinese_p China_b China_p
matrix rownames PYoung_all =q1 q13 q14 q15 q16 q24 q17 q18 q21 q19  q26 q20 q25 q22  q27 q28 q29 q30 q31
matrix list PYoung_all, f(%9.3f)

* Saves estimates in dataset with b and p prefixes. 
svmat2 double PYoung_all, names(col) r(names) 

* calculations, for text & last line of table
foreach var of varlist Beijing_b Mandarin_b Chinese_b China_b{
gen abs_`var'=abs(`var')
}
sum abs_*
count if abs(Mandarin_b) >abs(Chinese_b)

************************************ Figure 4: Three Correlations Between Effects 
grstyle set size 8pt: axis_title

* six blocks of code below each make one graph, then they're put together. 

* SubGraph 1
pwcorr Beijing_b China_b, sig // this bit of code puts in  the coeficifient, the p value and stars 
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit Beijing_b China_b, lpattern(dash) lcol(plr1))  (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter Beijing_b China_b, mlabstyle(p1) mcol(black) ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)  bcolor(white)  just(center) box width(40) lwidth(vvthick)) ),  nodraw name(loc_based, replace) legend(off)  ///
 ytitle("Location Effect") xtitle("Country Effect")

 * SubGraph 2
pwcorr Mandarin_b China_b, sig
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit Mandarin_b China_b, lpattern(dash) lcol(plr1)) (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter Mandarin_b China_b,  mlabstyle(p1) mcol(black)  ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)   bcolor(white) just(center) box width(40) lwidth(vvvthick) )), nodraw name(lang_based, replace) legend(off) ///
 ytitle("Language Effect") xtitle("Country Effect")

 * SubGraph 3
pwcorr Chinese_b China_b, sig
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit  Chinese_b China_b, lpattern(dash) lcol(plr1)) (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter  Chinese_b China_b, mlabstyle(p1) mcol(black) mlabp(12) ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)   bcolor(white) just(center) box width(40) lwidth(vthick))) , name(slow, replace) legend(off) nodraw ///
 ytitle("Nationality Effect") xtitle("Country Effect")

 * SubGraph 4
pwcorr Beijing_b Chinese_b, sig
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit Beijing_b Chinese_b , lpattern(dash) lcol(plr1))  (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter Beijing_b Chinese_b, mlabstyle(p1) mcol(black) ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)  bcolor(white)  just(center) box width(40) lwidth(vvthick)) ), nodraw name(a, replace) legend(off)  ///
 ytitle("Location Effect") xtitle("Nationality Effect")

* SubGraph 5
pwcorr Beijing_b Mandarin_b , sig
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit Beijing_b Mandarin_b, lpattern(dash) lcol(plr1)) (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter Beijing_b Mandarin_b,  mlabstyle(p1) mcol(black)  ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)   bcolor(white) just(center) box width(40) lwidth(vvvthick) )), nodraw name(b, replace) legend(off) ///
 ytitle("Location Effect") xtitle("Language Effect")
 
* SubGraph 6
pwcorr Mandarin_b Chinese_b , sig
matrix b=r(C)
local r_b: di %5.3f b[2,1]
di `r_b'
matrix p=r(sig)
local r_p: di %5.3f p[2,1]
di `r_p'
local stars ","
if (p[2,1]<.05)  local stars "*," 
if (p[2,1]<.01) local stars "**," 
if (p[2,1]<.001) local stars "***," 
twoway (lfit   Mandarin_b Chinese_b, lpattern(dash) lcol(plr1)) (function y=x, range(-1 1.5) lcol(plb1)) ///
(scatter   Mandarin_b Chinese_b, mlabstyle(p1) mcol(black) mlabp(12) ///
text(1.45 .25 "r=`r_b'`stars' `r_p' ", size(medsmall)   bcolor(white) just(center) box width(40) lwidth(vthick))) , name(c, replace) legend(off) nodraw ///
 ytitle("Language Effect") xtitle("Nationality Effect")

* put the 6 graphs together, show and save as a pdf
 gr combine  loc_based lang_based slow a b c, ycommon xsize(10) ysize(8) xcommon col(3) scale(1.3)
 graph export "correlation.pdf", as(pdf) replace

 
************************************ Figure 5: Robustness check: Time in the UK
* data prep
egen timeinuk_c=std(timeinuk)
gen chinese=(lang==2)

est clear
	foreach var of varlist $DEPVARS_C  {	
	eststo:	qui regress `var' chinese $controls_C timeinuk_c if lang==2 | lang==3 , noconstant robust 
	}

* then transposing:
esttab , keep(timeinuk_c) scalars(t)
 mat list r(coefs) 
esttab r(coefs, transpose), tex

est clear
	foreach var of varlist $DEPVARS_C  {	
	eststo:	 qui regress `var'  $controls_C chinese##c.timeinuk_c if lang==2 | lang==3 , robust 
	}
	

* subgraph 1
	coefplot  (est1 , aseq(1) ) (est2 , aseq(13)) ///
		 (est3 , aseq(14)) (est4 , aseq(15)) ///
		 (est5 , aseq(16)) (est6 , aseq(24)) ///
		 (est6 , aseq(17)) (est8 , aseq(18)) ///
		 (est8 , aseq(21)) (est10, aseq(19)) ///
		 (est11, aseq(26)) (est12, aseq(20)) ///
		 (est13, aseq(25)) (est14, aseq(22)) ///
		 (est15, aseq(27)) (est16, aseq(28)) ///
		 (est17, aseq(29)) (est18, aseq(30)) ///
		 (est19, aseq(31)) 					///
		 ,  keep( 1.chinese) swapnames  legend(off) ///
		 xline(0, lp(dash) lc(plr1) ) ytitle("Question:") ///
		xlabel(-1(1)1) xtick(-1(.5)1, grid)	 ///
		title("Language", size(small ) ) ///
		nodraw name(gr0, replace) fxsize(100) pstyle(p2)  mcol(black) ciopts(recast(rcap)) 

* subgraph 2
coefplot  (est1 , aseq(1) ) (est2 , aseq(13)) ///
		 (est3 , aseq(14)) (est4 , aseq(15)) ///
		 (est5 , aseq(16)) (est6 , aseq(24)) ///
		 (est6 , aseq(17)) (est8 , aseq(18)) ///
		 (est8 , aseq(21)) (est10, aseq(19)) ///
		 (est11, aseq(26)) (est12, aseq(20)) ///
		 (est13, aseq(25)) (est14, aseq(22)) ///
		 (est15, aseq(27)) (est16, aseq(28)) ///
		 (est17, aseq(29)) (est18, aseq(30)) ///
		 (est19, aseq(31)) 					///
		 ,  keep(timeinuk_c) swapnames  legend(off) ///
		 xline(0, lp(dash) lc(plr1) ) ///
		xlabel(-1(1)1) xtick(-1(.5)1, grid)	 ///
		title("Time in UK", size(small ) ) ///
		nodraw name(gr1, replace) ylabel(none) fxsize(80)  pstyle(p2)  mcol(black) ciopts(recast(rcap))

* subgraph 3
		coefplot  (est1 , aseq(1) ) (est2 , aseq(13)) ///
		 (est3 , aseq(14)) (est4 , aseq(15)) ///
		 (est5 , aseq(16)) (est6 , aseq(24)) ///
		 (est6 , aseq(17)) (est8 , aseq(18)) ///
		 (est8 , aseq(21)) (est10, aseq(19)) ///
		 (est11, aseq(26)) (est12, aseq(20)) ///
		 (est13, aseq(25)) (est14, aseq(22)) ///
		 (est15, aseq(27)) (est16, aseq(28)) ///
		 (est17, aseq(29)) (est18, aseq(30)) ///
		 (est19, aseq(31)) 					///
		 ,  keep(1.chinese#c.timeinuk_c ) swapnames  legend(off) ///
		 xline(0, lp(dash) lc(plr1) ) ///
		xlabel(-1(1)1) xtick(-1(.5)1, grid)	 ///
		title("Interaction Term", size(small ) ) ///
		nodraw name(gr2, replace) ylabel(none) fxsize(80) pstyle(p2)  mcol(black) ciopts(recast(rcap))

* join the subgraphs
gr combine gr0 gr1 gr2, b1("Coefficients and 95% Confidence Intervals, using Centred Data", size(vsmall))  scale(1.6)  col(3) imargin(vsmall) 

graph export "timeinuk.pdf", as(pdf) name("Graph") replace

*** adjusting p values (FWER Holland), in text (not table/figure)
gen pvalue=.
gen beta=.
local i=1
foreach var of varlist  $DEPVARS_C {
eststo:	 qui regress `var'  $controls_C chinese##c.timeinuk_c if lang==2 | lang==3 , robust 
local t = _b[timeinuk_c]/_se[timeinuk_c]
replace pvalue=2*ttail(`e(df_r)',abs(`t')) if _n==`i'
replace beta=_b[timeinuk_c] if _n==`i'
local i=`i'+1
}		 
 qqvalue pvalue, method(holland) qvalue(qvalue_timeinuk)
 list beta pvalue qvalue_timeinuk  in 1/19
 
 
 
************************************ Table  5: Robustness check: Language Proficiency 

matrix Samples=J(17,4,.)

forvalues s=1/4{
	matrix Samples[17,`s']=  0
	gen sample_`s'=1 													// complete
	replace sample_`s'=0 if lang>1 & lang<4 & q6=="hong kong" & `s'==2 	// HK
	replace sample_`s'=0 if lang==3 &  q8_english <4 & `s'==3 			// language
	replace sample_`s'=0 if lang>1 & lang<4 &  q8_english <4 & `s'==4 	// language

	local counter=1 
	matrix Empty`s'=J(19,4,.)
	
	foreach var of varlist $DEPVARS_C  {	
		qui regress `var' ibn.lang $controls_C if sample_`s'==1, noconstant robust 
		qui contrast {lang 1 -1 0 0} {lang 0 1 -1 0} {lang 0 0 1 -1 } {lang 1 0 0 -1 }, pveffects
			forvalues i=1/4 {
				matrix Empty`s'[`counter',`i']=r(b)[1,`i']
				}
	matrix  Samples[17,`s']= Samples[17,`s']+e(N)
	local counter= `counter'+1
	}

	mata : st_matrix("B`s'", colsum(st_matrix("Empty`s'"))) 
	mat li B`s'											// that is the total
}

matrix rownames Samples = Beijing Mandarin Chinese China cor1 p1 cor2 p2 cor3 p3 cor4 p4  cor5 p5   cor6 p6 Ntotal
matrix colnames Samples = All  NoHK Lang_limited Lang_Vlimited 

forvalues s=1/4 {
	svmat double Empty`s', name(averageeffects_`s'_)

	forvalues i=1/4 {
	gen averageeffects_`s'_`i'_magnitude=abs(averageeffects_`s'_`i')
	sum averageeffects_`s'_`i'_magnitude
	matrix Samples[`i',`s']= r(mean)	// to redo the average magnitude bit
	}
	
qui pwcorr averageeffects_`s'_1 averageeffects_`s'_2 averageeffects_`s'_3 averageeffects_`s'_4, sig
	matrix Samples[5,`s']=  r(C)[1,2]
	matrix Samples[6,`s']=  r(sig)[1,2]
	matrix Samples[7,`s']=  r(C)[1,3]
	matrix Samples[8,`s']=  r(sig)[1,3]
	matrix Samples[9,`s']=  r(C)[2,3]
	matrix Samples[10,`s']= r(sig)[2,3]
	matrix Samples[11,`s']= r(C)[1,4]
	matrix Samples[12,`s']= r(sig)[1,4]
	matrix Samples[13,`s']= r(C)[2,4]
	matrix Samples[14,`s']= r(sig)[2,4]
	matrix Samples[15,`s']= r(C)[3,4]
	matrix Samples[16,`s']= r(sig)[3,4]
}

matrix list Samples,format(%8.3f)


************************************  Table  6: Predicting Revealed Preference with Stated Preference
replace pvalue=.
replace beta=.
drop  qvalue
local i=1
foreach var of varlist q13_cen q14_cen q15_cen q28_cen q29_cen q30_cen q31_cen {
qui reg q1 `var', robust
local t = _b[`var']/_se[`var']
replace pvalue=2*ttail(e(df_r),abs(`t')) if _n==`i'
replace beta=_b[`var'] if _n==`i'
local i=`i'+1
}
qqvalue pvalue, method(holland) qvalue(qvalue)
list beta pvalue qvalue in 1/7


************************************ 	 Fin. 

	/*
Thanks for reading - I hope you enjoyed the code. 
Do let me (Paul Clist) know if you find any mistakes or need help replicating the work (noting the prep steps and so forth). Website with email address at the tpo of the do file. 


*/
		 
		 
		 
